'''


exec(''.join(open("/home/dalton_m/ppp/pyfiles/ANpppv12.py", encoding="utf8").readlines()[:]))
nohup python3 /home/dalton_m/ppp/pyfiles/ANpppv12.py  | tee &
'''


import os

import pandas as pd

os.environ["CUDA_DEVICE_ORDER"] = "PCI_BUS_ID"
os.environ["CUDA_VISIBLE_DEVICES"] = "2"
import sys

sys.path.append('/home/dalton_m/payload')
from basicfunctions import *


resultsloc1 = "/dataERS/eract/daltonm/results/ppp/"

filename = 'ANpppv12'

from datetime import date
datestr = date.today().strftime(format="%Y%m%d")
resultsloc = resultsloc1 + datestr + '/'
if not os.path.exists(resultsloc):
    os.makedirs(resultsloc)


logging.basicConfig(filename=resultsloc + filename + '.txt', level=logging.ERROR,
                    format='%(asctime)s - %(levelname)s - %(message)s')
logging.debug('This message should go to the log file')
logging.info('So should this')
logging.warning('And this, too')
logging.exception('And this, too')
logging.captureWarnings(True)

'''
census tract x 4 digit industry
tract x 2 digit industry
county x 4 digit industry

18 to 19
19 to 20
20 to 21

###separate measure for remote work singles

control for same tract
same county - different tract
same cz - different county
'''

##first year
dft2 = cudf.read_csv(dataloc + 'ldb/CRfirstyear.csv', sep = '|')
yearlist = [2018,2019,2020,2021]
kcond = (dft2['firstyear'].isin(yearlist))
dft2 = dft2[kcond]

mc = ['ldb_num']
kcols = mc + ['cnty', 'tract']
dfk = []
for year in yearlist:
    yr = str(year-2000)
    ###tract info
    dfk.append( dft2[dft2['firstyear']==year].merge(cudf.read_csv(dataloc + 'ldb/newgeog'+yr+'.psv', sep = '|')[kcols], on = mc, how = 'left'))

dft2 = None

dfk = cudf.concat(dfk)

####need NAICs
qtr =''
own = 5
fips = ''
cols = ['ldb_num', 'naics_code']
dfk['naics'] = np.NaN
for year in yearlist:
    dfldb = qcew(yr=year,qtr=qtr,own=own,fips=fips,cols=cols)
    tdict = dict(zip(dfldb['ldb_num'].to_pandas(), dfldb['naics_code'].to_pandas()))
    dfldb=None
    dfk['tempnaics'] = dfk['ldb_num'].map(tdict)
    dfk['naics'] = dfk['naics'].to_pandas().fillna(dfk['tempnaics'].to_pandas())

dfk['naics4'] = dfk['naics'].astype('str').str[:4].astype('float')
dfk['fipscounty'] = dfk['cnty'].astype('int').astype('str').str[-3:].astype('float')
dfk['fipsstate'] = dfk['cnty'].astype('int').astype('str').str[:-3].astype('float')

##################commuting zone

dfg = pd.read_excel(dataloc + 'geography/' + 'commuting_zones.xls')
dfg['fipscounty'] = dfg['FIPS'].apply(lambda x: float(str(x)[-3:]))
dfg['fipsstate'] = dfg['FIPS'].apply(lambda x: float(str(x)[:-3]))
rdict = {
    'Commuting Zone ID, 2000': 'cz',
}
mc = ['fipsstate', 'fipscounty']
kcols = mc + ['cz']
dfg = dfg.rename(columns=rdict)[kcols].drop_duplicates(subset=mc)
dfk = dfk.merge(cudf.DataFrame(dfg[kcols]), on=mc, how='left')


dfk['counter'] = 1
dfk1 = dfk.groupby(['fipsstate', 'cz', 'fipscounty', 'tract', 'naics4', 'firstyear'])['counter'].sum().reset_index()

#ppp loans by tract - naics
dfp = cudf.read_csv(dataloc + 'pppfiles/ANpppv12pt1.csv')


###move to county-level
dfk1['naics2'] = dfk1['naics4'].astype('str').str[:2].astype('float')
dfp['naics2'] = dfp['naics4'].astype('str').str[:2].astype('float')
dfk1 = dfk1.groupby(['fipsstate', 'cz', 'fipscounty', 'naics2', 'firstyear'])['counter'].sum().reset_index()
dfp = dfp.groupby(['fipsstate', 'cz', 'fipscounty', 'naics2', 'loan_year'])[['counter', 'CurrentApprovalAmount']].sum().reset_index()

###need county-naics-year with 0s
mc = ['fipsstate', 'fipscounty', 'naics2']
tnaics = dfp['naics2'].unique().to_pandas().tolist()
tcnty = dfk1.groupby(mc)['counter'].count().index.to_pandas().tolist()
dfcnty = cudf.DataFrame(tcnty, columns = mc)
dfc = []
for year in yearlist:
    dfcnty['firstyear'] = year
    dfc.append(dfcnty)

dfcnty = cudf.concat(dfc)

dfk1 = dfk1.merge(dfcnty, on = mc + ['firstyear'], how = 'outer')
##missings fill with 0
dfk1['counter'] = dfk1['counter'].to_pandas().fillna(0)
###remove bad NAICS
badnaics = [92,99]
kcond = (dfk1['naics2'].isin(badnaics))
dfk1 = dfk1[~kcond]
kcond = (dfp['naics2'].isin(badnaics))
dfp = dfp[~kcond]

rdict = {
    'counter' : 'counter_ppp_20',
}
mc = ['fipsstate', 'fipscounty', 'naics2']
dff = dfk1.merge(dfp[dfp['loan_year']==2020][mc + ['counter', 'CurrentApprovalAmount']].rename(columns = rdict), on = mc, how = 'left')


rdict = {
    'counter' : 'counter_ppp_21',
    'CurrentApprovalAmount' : 'CurrentApprovalAmount_21'
}
dff = dff.merge(dfp[dfp['loan_year']==2021][mc + ['counter', 'CurrentApprovalAmount']].rename(columns = rdict), on = mc, how = 'left')

for c in ['counter_ppp_20', 'CurrentApprovalAmount', 'counter_ppp_21','CurrentApprovalAmount_21']:
    dff[c] = dff[c].fillna(0)

dff = dff.to_pandas()
dfm = dff[dff['firstyear']==2021].merge(dff[dff['firstyear']==2018][mc + ['counter']], on = mc, how = 'outer', suffixes = ['', '_2018'])
for year in yearlist[1:-1]:
    dfm = dfm.merge(dff[dff['firstyear'] == year][mc + ['counter']], on=mc, how='outer',
                                              suffixes=['', '_'+str(year)])

for year in yearlist[:-1]:
    dfm['counter_'+str(year)] = dfm['counter_'+str(year)].fillna(0)

dfm['pct_1921'] = 2 * (dfm['counter'] - dfm['counter_2019'] ) / (dfm['counter'] + dfm['counter_2019'] )
dfm['pct_1819'] = 2 * (dfm['counter_2019'] - dfm['counter_2018'] ) / (dfm['counter_2019'] + dfm['counter_2019'] )

dfm['pct_diff'] = dfm['pct_1921'] - dfm['pct_1819']

###PPP money per employee?


viz(dfm[['counter', 'pct_1921', 'pct_1819', 'pct_diff', 'CurrentApprovalAmount']].corr())

dfp.groupby('loan_year')['CurrentApprovalAmount'].sum()